{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Importing data with genfromtxt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 2., 3.],\n",
       "       [4., 5., 6.]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from io import StringIO\n",
    "\n",
    "# Defining the input\n",
    "data = u\"1,2,3\\n4,5,6\"\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  1.,   2.,   3.],\n",
       "       [  4.,   5.,  67.],\n",
       "       [890., 123.,   4.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"  1  2  3\\n  4  5 67\\n890123  4\"\n",
    "np.genfromtxt(StringIO(data), delimiter=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1234.,  567.,   89.],\n",
       "       [   4.,    7.,    9.],\n",
       "       [   4.,  567.,    9.]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"123456789\\n   4  7 9\\n   4567 9\"\n",
    "np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([['1', ' abc ', ' 2'],\n",
       "       ['3', ' xxx', ' 4']], dtype='<U5')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"1, abc , 2\\n 3, xxx, 4\"\n",
    "# Without autostrip\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", dtype=\"|U5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([['1', 'abc', '2'],\n",
       "       ['3', 'xxx', '4']], dtype='<U5')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# With autostrip\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", dtype=\"|U5\", autostrip=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 2.],\n",
       "       [3., 4.],\n",
       "       [5., 6.],\n",
       "       [7., 8.],\n",
       "       [9., 0.]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"\"\"#\n",
    "# Skip me !\n",
    "# Skip me too !\n",
    "1, 2\n",
    "3, 4\n",
    "5, 6 #This is the third line of the data\n",
    "7, 8\n",
    "# And here comes the last line\n",
    "9, 0\n",
    "\"\"\"\n",
    "np.genfromtxt(StringIO(data), comments=\"#\", delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"\\n\".join(str(i) for i in range(10))\n",
    "np.genfromtxt(StringIO(data),)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3., 4.])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.genfromtxt(StringIO(data), skip_header=3, skip_footer=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 3.],\n",
       "       [4., 6.]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"1 2 3\\n4 5 6\"\n",
    "np.genfromtxt(StringIO(data), usecols=(0, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"1 2 3\\n4 5 6\"\n",
    "np.genfromtxt(StringIO(data), names=\"a, b, c\", usecols=(\"a\", \"c\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.genfromtxt(StringIO(data), names=\"a, b, c\", usecols=(\"a, c\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1, 2, 3), (4, 5, 6)],\n",
       "      dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8')])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, dtype=[(_, int) for _ in \"abc\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 2., 3.), (4., 5., 6.)],\n",
       "      dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8')])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, names=\"A, B, C\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 2., 3.), (4., 5., 6.)],\n",
       "      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"So it goes\\n#a b c\\n1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, skip_header=1, names=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1, 2., 3), (4, 5., 6)],\n",
       "      dtype=[('A', '<i8'), ('B', '<f8'), ('C', '<i8')])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "ndtype=[('a',int), ('b', float), ('c', int)]\n",
    "names = [\"A\", \"B\", \"C\"]\n",
    "np.genfromtxt(data, names=names, dtype=ndtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1, 2., 3), (4, 5., 6)],\n",
       "      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, dtype=(int, float, int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1, 2., 3), (4, 5., 6)],\n",
       "      dtype=[('a', '<i8'), ('f0', '<f8'), ('f1', '<i8')])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, dtype=(int, float, int), names=\"a\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1, 2., 3), (4, 5., 6)],\n",
       "      dtype=[('var_00', '<i8'), ('var_01', '<f8'), ('var_02', '<i8')])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = StringIO(\"1 2 3\\n 4 5 6\")\n",
    "np.genfromtxt(data, dtype=(int, float, int), defaultfmt=\"var_%02i\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., nan, 45.), (6., nan,  0.)],\n",
       "      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "convertfunc = lambda x: float(x.strip(b\"%\"))/100.\n",
    "data = u\"1, 2.3%, 45.\\n6, 78.9%, 0\"\n",
    "names = (\"i\", \"p\", \"n\")\n",
    "# General case .....\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", names=names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 0.023, 45.), (6., 0.789,  0.)],\n",
       "      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Converted case ...\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", names=names, converters={1: convertfunc})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([(1., 0.023, 45.), (6., 0.789,  0.)],\n",
       "      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using a name for the converter ...\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", names=names, converters={\"p\": convertfunc})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[   1., -999.,    3.],\n",
       "       [   4.,    5.,    6.]])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = u\"1, , 3\\n 4, 5, 6\"\n",
    "convert = lambda x: float(x.strip() or -999)\n",
    "np.genfromtxt(StringIO(data), delimiter=\",\", converters={1: convert})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
